import pyarrow as pa
import pyarrow.parquet as pq
import unittest


# parquet是只读的不支持追加和更改
class TestAccessParquet(unittest.TestCase):

    def setUp(self) -> None:
        self.file = 'test.parquet'
        self.names = ['name', 'content']
        self.data = [
            pa.array(['name']),
            pa.array(['content'])
        ]
        self.batch = pa.record_batch(self.data, self.names)

    def test_write_parquet(self):
        with pq.ParquetWriter(self.file, self.batch.schema) as w:
            for i in range(50000):
                w.write_batch(self.batch)

    def test_read_parquet(self):
        reader = pq.ParquetReader()
        reader.open(self.file)
        items = reader.read_all()
        print(items)


# append parquet
# with pq.ParquetWriter('test.parquet', batch.schema) as w:
#     w.write_batch(batch)
